load packages

library(tidyverse)
library(lme4)
library(lmerTest)
library(knitr)
library(MuMIn)
library(cowplot)
library(caret)
library(ROCR)

define color palettes

algorithm = c("#006989", "#FEC601", "#F43C13", "#00A5CF", "#00A878")
instruction = wesanderson::wes_palette("Darjeeling1", 2, "continuous")
craving = wesanderson::wes_palette("Darjeeling1", 3, "continuous")
rating = c("#00A08A", "#F2AD00", "#F98400", "#FF0000")
dc_bw = readRDS("~/dc_bw.Rds")

load tidied data

source("~/Documents/code/sanlab/DEV_scripts/behavioral/descriptives/ROC/load_data.R")

check ratings

Check if wrong buttons were used (i.e., not 5-8)

  • DEV001 = code normally
  • DEV011 = code normally
  • DEV016 = code normally
  • DEV017 = exclude; can’t tell if they’re missed ratings or incorrect placement of fingers
  • DEV019 = exclude; can’t tell if they’re missed ratings or incorrect placement of fingers
  • DEV020 = code normally
  • DEV022 = code normally
  • DEV028 = code normally
  • DEV032 = incorrect placement of fingers; recode runs 1-2 (LOOK INTO WTP)
  • DEV037 = exclude; technical error?
  • DEV054 = exclude; technical error?
  • DEV060 = code normally; task ended early
  • DEV061 = code normally; task ended early
  • DEV063 = code normally; task ended early
  • DEV069 = incorrect placement of fingers in run1
  • DEV075 = code normally
  • DEV082 = code normally
  • DEV083 = code normally
subs = data.all %>%
  group_by(subjectID, run, rating) %>%
  summarize(n = n()) %>%
  spread(rating, n) %>%
  mutate(messed = ifelse(is.na(`5`) & !is.na(`<NA>`), "yes", NA)) %>%
  filter(messed == "yes") %>% 
  ungroup() %>% 
  select(subjectID) %>% 
  unique()

data.all %>%
  group_by(subjectID, run, rating) %>%
  summarize(n = n()) %>%
  spread(rating, n) %>%
  mutate(messed = ifelse(is.na(`5`) & !is.na(`<NA>`), "yes", NA)) %>%
  filter(subjectID %in% subs$subjectID)

recode and exclude

Recoding
* DEV069: recode runs1

data.ex = data.all %>%
  mutate(rating = ifelse(subjectID == "DEV069" & run == "run1", rating - 1, rating),
         rating = ifelse(subjectID == "DEV069" & run == "run1" & is.na(rating), 8, rating),
         rating = rating - 4) %>%
  group_by(subjectID, wave) %>%
  arrange(subjectID, run) %>%
  mutate(trial = row_number())

load striping info

striping = read.csv("~/Documents/code/sanlab/DEV_scripts/fMRI/betaseries/ROC/striping_QC.csv")

load mean intensity values

file_dir = "~/Documents/code/sanlab/DEV_scripts/fMRI/betaseries/ROC/dotProducts_ROC/"
file_pattern = "DEV[0-9]{3}_meanIntensity.txt"
file_list = list.files(file_dir, pattern = file_pattern)

intensities = data.frame()

for (file in file_list) {
  temp = tryCatch(read.table(file.path(file_dir,file), fill = TRUE) %>%
                    rename("subjectID" = V1,
                           "meanIntensity" = V3) %>%
                    extract(V2, "beta", "beta_([0-9]{4}).nii") %>%
                    mutate(beta = as.integer(beta)), error = function(e) message(file))
  intensities = rbind(intensities, temp)
  rm(temp)
}

load dot products

file_dir = "~/Documents/code/sanlab/DEV_scripts/fMRI/betaseries/ROC/dotProducts_ROC/"
file_pattern = "DEV[0-9]{3}_dotProducts.txt"
file_list = list.files(file_dir, pattern = file_pattern)

dots = data.frame()

for (file in file_list) {
  temp = tryCatch(read.table(file.path(file_dir,file), fill = TRUE) %>%
                    rename("subjectID" = V1,
                           "map" = V3,
                           "dotProduct" = V4) %>%
                    extract(V2, "beta", "beta_([0-9]{4}).nii") %>%
                    extract(map, "algorithm", "(.*)_.*.nii") %>%
                    mutate(beta = as.integer(beta)), error = function(e) message(file))
  dots = rbind(dots, temp)
  rm(temp)
}

join intensities and dots

  • recode trials with extreme intensities as NA
dots.merged = dots %>%
  left_join(., intensities, by = c("subjectID", "beta")) %>%
  group_by(subjectID, algorithm) %>%
  mutate(rownum = row_number())

# plot original
dots.merged %>%
  filter(algorithm == "logistic") %>%
  ggplot(aes(1, meanIntensity)) +
    geom_boxplot()

# assess extreme values and exclude when calculating SDs
dots.merged %>%
  filter(algorithm == "logistic") %>%
  arrange(meanIntensity)
dots.merged %>%
  filter(algorithm == "logistic") %>%
  arrange(-meanIntensity)
# recode outliers as NA
dots.merged = dots.merged %>%
  ungroup() %>%
  mutate(meanIntensity = ifelse(meanIntensity > 1 | meanIntensity < -1, NA, meanIntensity),
         median = median(meanIntensity, na.rm = TRUE),
         sd3 = 3*sd(meanIntensity, na.rm = TRUE),
         outlier = ifelse(meanIntensity > median + sd3 | meanIntensity < median - sd3, "yes", "no"),
         dotProduct = ifelse(outlier == "yes", NA, dotProduct))
  
# plot after
dots.merged %>%
  filter(algorithm == "logistic") %>%
  ggplot(aes(1, meanIntensity)) +
    geom_boxplot()

recode subs

  • DEV022 = run4 has 8 trials
  • DEV037 = ???
  • DEV048 = run4 missing
  • DEV060 = run1 has 19 trials; couldn’t estimate run1 trial 19, run3 trial 20
  • DEV061 = run3 has 19 trials; couldn’t estimate run3 trial 19
  • DEV063 = run2 has 11 trials
  • DEV081 = run2 missing (run1 was run twice)
  • DEV082 = run2 has 15 trials; couldn’t estimate run1 trial 19, run1 trial 20
trial.numbers = data.frame(subjectID = c(rep("DEV060", 79), rep("DEV061", 79), rep("DEV063", 71), rep("DEV081", 80), rep("DEV082", 75)),
                           rownum = c(1:79, 1:79, 1:71, 1:80, 1:75),
                           trial = c(1:19, 21:80, 1:59, 61:80, 1:31, 41:80, 1:20, 41:80, 21:40, 1:35, 41:80))

dots.check = dots.merged %>%
  group_by(subjectID, algorithm) %>%
  mutate(rownum = row_number()) %>%
  left_join(., trial.numbers, by = c("subjectID", "rownum")) %>%
  mutate(trial = ifelse(is.na(trial), rownum, trial),
         dotProduct = ifelse(subjectID == "DEV060" & trial %in% 19:20, NA,
                      ifelse(subjectID == "DEV061" & trial == 59, NA,
                      ifelse(subjectID == "DEV082" & trial %in% 19:20, NA, dotProduct)))) %>%
  select(-rownum) %>%
  left_join(., striping, by = c("subjectID", "beta")) %>%
  mutate(dotProduct = ifelse(!is.na(striping), NA, dotProduct))

exclude outliers and standardize

  • standardize within subject and algorithm
dots.ex = dots.check %>%
  filter(!algorithm %in% c("ridge", "svm")) %>%
  group_by(subjectID, algorithm) %>% # standardize within sub and algorithm
  mutate(dotSTD = scale(dotProduct, center = FALSE)) 

merge data and exclude subs

Exclusions

  • MRI motion and data quality exclusions: DEV001, DEV020, DEV032, DEV047, DEV055, DEV064, DEV066
  • Button box exclusions: DEV017, DEV019, DEV037, DEV054
  • Run exclusions: DEV029 (run3), DEV037 (run1), DEV042 (run4), DEV067 (run4)

Other * select only craved trials

data = left_join(dots.ex, data.ex, by = c("subjectID", "trial")) %>%
  filter(!subjectID %in% c("DEV001","DEV020","DEV032","DEV047","DEV055","DEV064","DEV066", "DEV017", "DEV019", "DEV037", "DEV054")) %>%
  filter(!(subjectID == "DEV029" & run == "run3") & !(subjectID == "DEV037" & run == "run1") & !(subjectID == "DEV042" & run == "run4") & !(subjectID == "DEV067" & run == "run4")) %>%
  ungroup() %>%
  mutate(algorithm = ifelse(algorithm == "reg_look", "regulate > look", 
                     ifelse(algorithm == "reg", "regulate > rest", algorithm))) %>%

  filter(craving == "craved")

summarize

data %>%
  filter(algorithm == "logistic") %>%
  group_by(subjectID) %>%
  summarize(n = n()) %>%
  arrange(n)

roc

# roc curve
perf.df = data %>%
  mutate(instruction = ifelse(instruction == "regulate", 1, 0)) %>%
  group_by(algorithm) %>%
  do({
    algorithm = .$algorithm
    pred = prediction(.$dotSTD, .$instruction, label.ordering = NULL)
    perf = performance(pred, measure = "tpr", x.measure = "fpr")
    data.frame(cut=perf@alpha.values[[1]],fpr=perf@x.values[[1]],tpr=perf@y.values[[1]])
  })

ggplot(perf.df, aes(fpr, tpr, color = algorithm)) +
  geom_line() +
  scale_color_manual(values = algorithm) +
  xlab("false positive rate") +
  ylab("true positive rate")

# logistic
roc = data %>%
  filter(algorithm == "logistic") %>%
  select(subjectID, trial, instruction, rating, dotSTD) %>%
  mutate(guess.instruction = ifelse(dotSTD > 0, "regulate", "look"),
         guess.rating = ifelse(dotSTD > 0, "low", "high"),
         rating.bin = ifelse(rating >= 3, "high", "low"),
         instruction = as.factor(instruction),
         guess.instruction = as.factor(guess.instruction),
         rating.bin = as.factor(rating.bin),
         guess.rating = as.factor(guess.rating))

confusionMatrix(roc$guess.instruction, roc$instruction)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction look regulate
##   look      881      338
##   regulate  529     1073
##                                                
##                Accuracy : 0.6927               
##                  95% CI : (0.6753, 0.7097)     
##     No Information Rate : 0.5002               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.3853               
##                                                
##  Mcnemar's Test P-Value : 0.0000000001098      
##                                                
##             Sensitivity : 0.6248               
##             Specificity : 0.7605               
##          Pos Pred Value : 0.7227               
##          Neg Pred Value : 0.6698               
##              Prevalence : 0.4998               
##          Detection Rate : 0.3123               
##    Detection Prevalence : 0.4321               
##       Balanced Accuracy : 0.6926               
##                                                
##        'Positive' Class : look                 
## 
confusionMatrix(roc$guess.rating, roc$rating.bin)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction high low
##       high  836 343
##       low   768 750
##                                              
##                Accuracy : 0.5881             
##                  95% CI : (0.5692, 0.6067)   
##     No Information Rate : 0.5947             
##     P-Value [Acc > NIR] : 0.7661             
##                                              
##                   Kappa : 0.1953             
##                                              
##  Mcnemar's Test P-Value : <0.0000000000000002
##                                              
##             Sensitivity : 0.5212             
##             Specificity : 0.6862             
##          Pos Pred Value : 0.7091             
##          Neg Pred Value : 0.4941             
##              Prevalence : 0.5947             
##          Detection Rate : 0.3100             
##    Detection Prevalence : 0.4372             
##       Balanced Accuracy : 0.6037             
##                                              
##        'Positive' Class : high               
## 
# regulate > look
roc = data %>%
  filter(algorithm == "regulate > look") %>%
  select(subjectID, trial, instruction, rating, dotSTD) %>%
  mutate(guess.instruction = ifelse(dotSTD > 0, "regulate", "look"),
         guess.rating = ifelse(dotSTD > 0, "low", "high"),
         rating.bin = ifelse(rating >= 3, "high", "low"),
         instruction = as.factor(instruction),
         guess.instruction = as.factor(guess.instruction),
         rating.bin = as.factor(rating.bin),
         guess.rating = as.factor(guess.rating))

confusionMatrix(roc$guess.instruction, roc$instruction)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction look regulate
##   look      875      551
##   regulate  535      860
##                                              
##                Accuracy : 0.615              
##                  95% CI : (0.5968, 0.633)    
##     No Information Rate : 0.5002             
##     P-Value [Acc > NIR] : <0.0000000000000002
##                                              
##                   Kappa : 0.2301             
##                                              
##  Mcnemar's Test P-Value : 0.649              
##                                              
##             Sensitivity : 0.6206             
##             Specificity : 0.6095             
##          Pos Pred Value : 0.6136             
##          Neg Pred Value : 0.6165             
##              Prevalence : 0.4998             
##          Detection Rate : 0.3102             
##    Detection Prevalence : 0.5055             
##       Balanced Accuracy : 0.6150             
##                                              
##        'Positive' Class : look               
## 
confusionMatrix(roc$guess.rating, roc$rating.bin)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction high low
##       high  862 513
##       low   742 580
##                                           
##                Accuracy : 0.5347          
##                  95% CI : (0.5156, 0.5536)
##     No Information Rate : 0.5947          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0659          
##                                           
##  Mcnemar's Test P-Value : 0.0000000001227 
##                                           
##             Sensitivity : 0.5374          
##             Specificity : 0.5306          
##          Pos Pred Value : 0.6269          
##          Neg Pred Value : 0.4387          
##              Prevalence : 0.5947          
##          Detection Rate : 0.3196          
##    Detection Prevalence : 0.5098          
##       Balanced Accuracy : 0.5340          
##                                           
##        'Positive' Class : high            
## 

visualize

instruction

data %>%
  filter(!is.na(rating)) %>%
  ggplot(aes(algorithm, dotSTD, fill = instruction)) +
    stat_summary(fun.y = mean, geom = "bar", position = position_dodge(width = 0.95)) +
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", position = position_dodge(width = 0.95), width = 0) +
    scale_fill_manual(name = "", values = instruction) + 
    labs(y = "standardized regulation pattern expression value\n", x = "") + 
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  ggplot(aes(instruction, dotSTD)) +
    stat_summary(aes(group = subjectID), fun.y = mean, geom = "line", alpha = .1, size = .5) +
    stat_summary(aes(group = 1), fun.y = mean, geom = "line", size = .75) +
    stat_summary(aes(color = instruction), fun.data = "mean_cl_boot",  geom = "pointrange", width = 0, size = .75) + 
    facet_grid(~algorithm) +
    scale_color_manual(name = "", values = instruction) + 
    labs(y = "standardized regulation pattern expression value\n", x = "") + 
    dc_bw

rating

data %>%
  filter(!is.na(rating)) %>%
  ggplot(aes(algorithm, dotSTD, fill = as.factor(rating))) +
    stat_summary(fun.y = mean, geom = "bar", position = position_dodge(width = 0.95)) +
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", position = position_dodge(width = 0.95), width = 0) +
    scale_fill_manual(name = "", values = rating) + 
    facet_grid(~instruction) + 
    labs(y = "standardized regulation pattern expression value\n", x = "") + 
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  group_by(rating, algorithm, instruction) %>%
  mutate(n.obs = n()) %>%
  ggplot(aes(as.factor(rating), dotSTD, color = algorithm)) +
    stat_summary(aes(group = algorithm), fun.y = mean, geom = "line") +
    stat_summary(fun.data = "mean_cl_boot", geom = "linerange") +
    stat_summary(aes(size = n.obs), fun.y = mean, geom = "point") +
    scale_color_manual(name = "", values = algorithm) + 
    scale_size(name = "", range = c(1,4)) + 
    facet_grid(~instruction) + 
    labs(x = "\nrating", y = "standardized regulation pattern expression value\n") + 
    dc_bw

rating and instruction

data %>%
  filter(!is.na(rating)) %>%
  group_by(rating, algorithm, instruction) %>%
  mutate(n.obs = n()) %>%
  ggplot(aes(as.factor(rating), dotSTD, color = instruction)) +
    stat_summary(aes(group = instruction), fun.y = mean, geom = "line") +
    stat_summary(fun.data = "mean_cl_boot", geom = "linerange") +
    stat_summary(aes(size = n.obs), fun.y = mean, geom = "point") +
    scale_color_manual(name = "", values = instruction) + 
    scale_size(name = "", range = c(1,4)) + 
    facet_grid(~algorithm) + 
    labs(x = "\nrating", y = "standardized regulation pattern expression value\n") + 
    dc_bw

rating high v. low

data %>%
  filter(!is.na(rating)) %>%
  mutate(rating.bin = ifelse(rating >= 3, "high", "low")) %>%
  ggplot(aes(rating.bin, dotSTD)) +
    stat_summary(aes(group = subjectID), fun.y = mean, geom = "line", alpha = .1, size = .5) +
    stat_summary(aes(group = 1), fun.y = mean, geom = "line", size = .75) +
    stat_summary(aes(color = rating.bin), fun.data = mean_cl_boot, geom = "pointrange", width = 0, size = .75) + 
    facet_grid(~algorithm) +
    scale_color_manual(name = "", values = craving) + 
    coord_cartesian(ylim = c(-1, 1.25)) + 
    labs(y = "standardized regulation pattern expression value\n", x = "") + 
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  mutate(rating.bin = ifelse(rating >= 3, "high", "low")) %>%
  ggplot(aes(rating.bin, dotSTD)) +
    stat_summary(aes(group = interaction(subjectID, instruction), color = instruction), fun.y = mean, geom = "line", alpha = .1, size = .5) +
    stat_summary(aes(group = instruction, color = instruction), fun.y = mean, geom = "line", size = .75) +
    stat_summary(aes(color = instruction), fun.data = mean_cl_boot, geom = "pointrange", width = 0, size = .75) + 
    facet_grid(~algorithm) +
    scale_color_manual(name = "", values = instruction) + 
    coord_cartesian(ylim = c(-1.5, 2)) + 
    labs(y = "standardized regulation pattern expression value\n", x = "") + 
    dc_bw

RT

data %>%
  filter(!is.na(rating)) %>%
  ggplot(aes(rt, dotSTD, color = instruction)) +
    geom_smooth(method = "lm", alpha = .2) + 
    facet_grid(~algorithm) + 
    scale_color_manual(name = "", values = instruction) + 
    labs(x = "\nreaction time (seconds)", y = "standardized regulation pattern expression value\n") + 
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  filter(algorithm == "logistic") %>%
  ggplot(aes(as.factor(rating), rt, color = instruction)) +
    stat_summary(aes(group = instruction), fun.y = mean, geom = "line") +
    stat_summary(fun.data = mean_cl_boot, geom = "pointrange", width = 0) +
    facet_grid(~craving) +
    scale_color_manual(values = instruction) +
    labs(x = "\nrating", y = "reaction time (seconds)\n") + 
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  filter(algorithm == "logistic") %>%
  ggplot(aes(rt, dotSTD, color = as.factor(rating))) +
    geom_smooth(method = "lm", alpha = .1) + 
    facet_grid(~instruction) + 
    scale_color_manual(name = "rating", values = rating) + 
    labs(x = "\nreaction time (seconds)", y = "standardized regulation pattern expression value\n") + 
    dc_bw

individual diffs

task

data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction, craving) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE)) %>%
  ggplot(aes(instruction, meanPEV, color = instruction)) + 
    geom_boxplot() +
    facet_wrap(~algorithm, scales = "free", ncol = 4) +
    scale_color_manual(values = instruction) +
    labs(x = "", y = "mean regulation pattern expression value\n") +
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE),
         meanRating = mean(rating, na.rm = TRUE)) %>%
  select(subjectID, algorithm, meanPEV, meanRating, instruction) %>%
  unique() %>%
  ggplot(aes(meanPEV, meanRating, color = instruction)) + 
    geom_point() +
    geom_smooth(method = "lm", se = .1) +
    facet_wrap(~algorithm, scales = "free", ncol = 4) +
    scale_color_manual(values = instruction) +
    labs(x = "\nmean regulation pattern expression value", y = "mean craving rating\n") +
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE),
         meanRating = mean(rating, na.rm = TRUE),
         instruction1 = instruction) %>%
  select(subjectID, algorithm, meanPEV, meanRating, instruction, instruction1) %>%
  unique() %>%
  spread(instruction, meanRating) %>%
  group_by(subjectID, algorithm) %>%
  fill(everything(), .direction = "down") %>%
  fill(everything(), .direction = "up") %>%
  mutate(success = look - regulate,
         success.percent = ((look - regulate) / look) * 100) %>%
  spread(instruction1, meanPEV) %>%
  mutate(diff = regulate - look) %>%
  ggplot(aes(diff, success.percent)) + 
    geom_point() +
    geom_smooth(method = "lm", se = .1) +
    facet_wrap(~algorithm, scales = "free", ncol = 4) +
    #scale_color_manual(values = instruction) +
    labs(x = "\ndifference in mean regulation pattern expression value (regulate - look)", y = "percent change in craving (look - regulate / look)") +
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE),
         meanRating = mean(rating, na.rm = TRUE),
         instruction1 = instruction) %>%
  select(subjectID, algorithm, meanPEV, meanRating, instruction, instruction1) %>%
  unique() %>%
  spread(instruction, meanRating) %>%
  group_by(subjectID, algorithm) %>%
  fill(everything(), .direction = "down") %>%
  fill(everything(), .direction = "up") %>%
  mutate(success = look - regulate,
         success.percent = ((look - regulate) / look) * 100) %>%
  spread(instruction1, meanPEV) %>%
  mutate(diff = regulate - look) %>% 
  ungroup() %>%
  nest(-algorithm) %>% 
  mutate(
    test = map(data, ~ cor.test(.x$success.percent, .x$diff)),
    tidied = map(test, broom::tidy)
  ) %>% 
  unnest(tidied, .drop = TRUE)

restraint

# load qualtrics
cred_file_location = '~/credentials.yaml.DEFAULT'
sid_column_name = 'Login|qid'
survey_name_filter = 'DEV Session 1 Surveys'
sid_pattern = 'DEV[0-9]{3}'
exclude_sid = '99|DEV737' # subject IDs to exclude

# load credential file
credentials = scorequaltrics::creds_from_file(cred_file_location)

# filter
surveysAvail = scorequaltrics::get_surveys(credentials)
surveysFiltered = filter(surveysAvail, grepl(survey_name_filter, SurveyName))

# get data
surveys_long = scorequaltrics::get_survey_data(surveysFiltered,
                                               credentials, 
                                               pid_col = sid_column_name) %>%
  mutate(Login = gsub("Dev", "DEV", Login),
         Login = gsub("dev", "DEV", Login),
         Login = ifelse(grepl("^[0-9]{3}$", Login), paste0("DEV", Login), Login),
         Login = ifelse(Login == "DEVO55", "DEV055", Login))

# check for repeats
repeat.subs = surveys_long %>%
  filter(item == "Finished" & value == "1") %>%
  filter(!grepl(exclude_sid, Login) & !is.na(Login)) %>%
  group_by(survey_name, Login) %>%
  summarize(n = n()) %>%
  filter(n > 1) %>%
  spread(survey_name, n)

# surveys_long %>%
#   filter(item == "StartDate") %>%
#   filter(Login %in% repeat.subs$Login) %>%
#   group_by(survey_name, Login) %>%
#   mutate(n = n()) %>%
#   filter(n > 1)

# filter out repeats
surveys = surveys_long %>%
  filter(Login %in% unique(data$subjectID)) %>%
  filter(!qid %in% c("R_1M01CRpEgQ9Sjzx", "R_3JfnLZ2XhekmvvF", "R_RUXzgKp7Sne7865")) %>%
  select(-qid) %>%
  rename("subjectID" = Login)

# get and score restraint scale
restraint = surveys %>%
  filter(grepl("RS", item)) %>%
  mutate(value = ifelse(value == "", NA, value),
         value = as.integer(value)) %>%
  group_by(subjectID) %>%
  summarize(restraint = sum(value, na.rm = TRUE))

# correlation with other measures
data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE)) %>%
  select(subjectID, algorithm, meanPEV, instruction) %>%
  unique() %>%
  group_by(subjectID) %>%
  spread(instruction, meanPEV) %>%
  mutate(diff = regulate - look) %>%
  left_join(., restraint) %>%
  ggplot(aes(diff, restraint)) + 
    geom_point() +
    geom_smooth(method = "lm", se = .1) +
    facet_wrap(~algorithm, scales = "free", ncol = 3) +
    labs(x = "\ndifference in mean regulation pattern expression value (regulate - look)", y = "restraint score\n") +
    dc_bw

data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE)) %>%
  select(subjectID, algorithm, meanPEV, instruction) %>%
  unique() %>%
  group_by(subjectID) %>%
  spread(instruction, meanPEV) %>%
  mutate(diff = regulate - look) %>%
  left_join(., restraint) %>% 
  ungroup() %>%
  nest(-algorithm) %>% 
  mutate(
    test = map(data, ~ cor.test(.x$restraint, .x$diff)),
    tidied = map(test, broom::tidy)
  ) %>% 
  unnest(tidied, .drop = TRUE)

correlations

cors = data %>%
  filter(!is.na(rating)) %>%
  group_by(subjectID, algorithm, instruction) %>%
  mutate(meanPEV = mean(dotProduct, na.rm = TRUE),
         meanRating = mean(rating, na.rm = TRUE)) %>%
  gather(variable, value, meanPEV, meanRating) %>%
  select(subjectID, instruction, algorithm, variable, value) %>%
  unique() %>%
  unite("instruction", c("variable", "instruction")) %>%
  ungroup() %>%
  select(subjectID, algorithm, instruction, value) %>%
  group_by(algorithm) %>%
  do({
    instruction.spread = spread(., instruction, value)
    cors = cor(instruction.spread[,-c(1:2)], use = "pairwise.complete.obs") %>%
      as.data.frame() %>%
      mutate(algorithm = instruction.spread$algorithm[[1]],
             instruction = colnames(instruction.spread)[-c(1:2)])
  })


cors %>%
  reshape2::melt() %>%
  ggplot(aes(instruction, variable, fill = value)) +
    geom_tile(color = "white") +
    scale_fill_gradientn(name = "correlation\n", colors = c("#3B9AB2", "white", "#F21A00"), limits = c(-1, 1), breaks = c(-1, 0, 1)) + 
    geom_text(aes(label = round(value, 2)), size = 3) + #family = "Futura Medium"
    #geom_text(aes(label = significant), size = 4, family = "Futura Medium", nudge_x = .3, nudge_y = .2) + 
    facet_wrap(~algorithm) +
    labs(x = "", y = "") + 
    dc_bw +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

specification curve analysis

models

# set na.action for dredge
options(na.action = "na.fail")

# tidy data
data.sca = data %>%
  select(subjectID, trial, algorithm, dotSTD, rating, instruction) %>%
  spread(algorithm, dotSTD) %>%
  na.omit()

# run full model
models = lmer(rating ~ instruction*logistic + instruction*`regulate > rest` + instruction*`regulate > look` + (1 | subjectID), data = data.sca)

# run all possible nested models
models.sca = dredge(models, rank = "AIC", extra = "BIC") %>%
  select(AIC, delta, BIC, df, logLik, weight, `(Intercept)`, instruction, everything())

# set AIC for null model you want to compare model AIC values to
null = models.sca %>%
  arrange(df) %>%
  filter(instruction == "+" & df == 4)

plot

# tidy for plotting
plot.data = models.sca %>%
  arrange(AIC) %>%
  mutate(specification = row_number(),
         better.fit = ifelse(AIC == null$AIC, "equal",
                      ifelse(AIC < null$AIC, "yes", "no")))

order = plot.data %>%
  arrange(AIC) %>%
  mutate(better.fit.num = ifelse(better.fit == "yes", 1, 0)) %>%
  gather(variable, value, -c(AIC, delta, BIC, df, logLik, weight, better.fit.num, specification, better.fit)) %>%
  filter(!is.na(value)) %>%
  group_by(variable) %>%
  mutate(order = sum(better.fit.num)) %>%
  select(variable, order) %>%
  unique()

# variables included in model
variable.names = names(select(plot.data, -starts_with("better"), -specification, -AIC, -BIC, -df, -logLik, -delta, -weight, -`(Intercept)`))

# plot top panel
a = plot.data %>%
  ggplot(aes(specification, AIC, color = better.fit)) +
    geom_point(shape = "|", size = 4, alpha = .75) +
    geom_hline(yintercept = null$AIC, linetype = "dashed", color = "#5BBCD6") +
    scale_color_manual(values = c("#5BBCD6", "black", "#F43C13")) +
    labs(x = "", y = "AIC\n") +
    theme_minimal(base_size = 14) +
    theme(legend.title = element_text(size = 10),
          legend.text = element_text(size = 9),
          axis.text = element_text(color = "black"),
          axis.line = element_line(colour = "black"),
          legend.position = "none",
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank())

# set plotting order for variables based on number of times it's included in better fitting models
b = plot.data %>%
  gather(variable, value, eval(variable.names)) %>%
  left_join(., order, by = "variable") %>%
  mutate(value = ifelse(!is.na(value), "|", ""),
         variable = gsub("`regulate > look`", "regulate > look", variable),
         variable = gsub("`regulate > rest`", "regulate > rest", variable),
         variable = gsub("regulate > look:instruction", "instruction:regulate > look", variable),
         variable = gsub("regulate > rest:instruction", "instruction:regulate > rest", variable),
         variable = gsub("instruction:", "instruction  x  ", variable)) %>%
  ggplot(aes(specification, reorder(variable, order), color = better.fit)) +
    geom_text(aes(label = value), alpha = .75) +
    scale_color_manual(values = c("#5BBCD6", "black", "#F43C13")) +
    labs(x = "\nspecification number", y = "variables\n") +
    theme_minimal(base_size = 14) +
    theme(legend.title = element_text(size = 10),
          legend.text = element_text(size = 9),
          axis.text = element_text(color = "black"),
          axis.line = element_line(colour = "black"),
          legend.position = "none",
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank())

(plot = plot_grid(a, b, ncol = 1, align = "v"))